home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The 640 MEG Shareware Studio 2
/
The 640 Meg Shareware Studio CD-ROM Volume II (Data Express)(1993).ISO
/
clang
/
nn.zip
/
DIGEST.C
< prev
next >
Wrap
C/C++ Source or Header
|
1989-12-31
|
10KB
|
425 lines
/*
* digest article handling
*/
#include "config.h"
#include "news.h"
#include "match.h"
#include "debug.h"
#ifdef DG_TEST
#define TEST(fmt, x, y) if (Debug & DG_TEST) printf(fmt, x, y)
#else
#define TEST(fmt, x, y)
#endif
/*
* test if global 'news' header is header of a digest.
* body points to a buffer (NUL term)
* containing the first part of the article.
*/
static char match_digest[128] = {
/* NUL SOH STX ETX EOT ENQ ACK BEL BS TAB NL VT FF CR SO SI */
00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00,
/* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */
00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00,
/* SP ! " # $ % & ' ( ) * + , - . / */
00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00,
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 00, 00, 00, 00, 00, 00,
/* @ A B C D E F G H I J K L M N O */
00, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
/* P Q R S T U V W X Y Z [ \ ] ^ _ */
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 00, 00,
/* ` a b c d e f g h i j k l m n o */
00, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
/* p q r s t u v w x y z { | } ~ DEL */
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 00, 00
};
static char digest_pattern[] = "digest";
init_digest_parsing()
{
init_quick_match(digest_pattern);
}
is_digest(body)
register char *body;
{
char *dpos, *quick_match();
register char *sp;
register int l;
/* articles without a subject line are not digests (per definition) */
if (news.ng_subj == NULL) return 0;
if (dpos = quick_match(news.ng_subj, digest_pattern)) {
int lgt = dpos - news.ng_subj;
int maxl = 10;
/* look for a line matching the subject */
while (*body && maxl) {
sp = news.ng_subj;
l = lgt;
if (*body == *sp && strncmp(body, sp, l) == 0)
goto ok;
while (*body && *body != NL) {
while (*sp && MATCH_DROP(match_digest, *sp)) {
if (--l == 0) goto ok;
++sp;
}
if (MATCH_DROP(match_digest, *body)) {
++body;
continue;
}
if (*sp && MATCH_EQ(match_digest, *body, *sp)) {
if (--l == 0) goto ok;
++sp;
}
++body;
}
if (*body) ++body, --maxl;
}
}
return 0;
ok:
TEST("is_digest: %s\n", news.ng_subj, 0);
return 1;
}
/*
* expect that f is positioned at header of an article
*/
static int is_mmdf_folder = 0;
get_digest_article(f, hdrbuf)
FILE *f;
news_header_buffer hdrbuf;
{
int cont;
digest.dg_hpos = ftell(f);
TEST("GET DIGEST hp=%ld\n", digest.dg_hpos, 0);
do {
if (!parse_digest_header(f, 0, hdrbuf)) return -1;
digest.dg_fpos = ftell(f);
TEST("END HEADER hp=%ld fp=%ld\n", digest.dg_hpos, digest.dg_fpos);
} while ((cont = skip_digest_body(f)) < 0);
TEST("END BODY lp=%ld next=%ld\n", digest.dg_lpos, ftell(f));
return cont;
}
#define BACKUP_LINES 50 /* remember class + offset for parsed lines */
#define LN_BLANK 0x01 /* blank line */
#define LN_DASHED 0x02 /* dash line */
#define LN_HEADER 0x04 /* (possible) header line */
#define LN_ASTERISK 0x08 /* asterisk line (near end) */
#define LN_END_OF 0x10 /* End of ... line */
#define LN_TEXT 0x20 /* unclassified line */
/*
* skip until 'Subject: ' (or End of digest) line is found
* then backup till start of header
*/
/*
* Tuning parameters:
*
* MIN_HEADER_LINES: number of known header lines that must
* be found in a block to identify a new
* header
*
* MAX_BLANKS_DASH max no of blanks on a 'dash line'
*
* MIN_DASHES min no of dashes on a 'dash line'
*
* MAX_BLANKS_ASTERISKS max no of blanks on an 'asterisk line'
*
* MIN_ASTERISKS min no of asterisks on an 'asterisk line'
*
* MAX_BLANKS_END_OF max no of blanks before "End of "
*/
#define MIN_HEADER_LINES 2
#define MAX_BLANKS_DASH 3
#define MIN_DASHES 16
#define MAX_BLANKS_ASTERISK 1
#define MIN_ASTERISKS 10
#define MAX_BLANKS_END_OF 1
skip_digest_body(f)
register FILE *f;
{
off_t backup_p[BACKUP_LINES];
int line_type[BACKUP_LINES];
register int backup_index, backup_count;
int more_header_lines, end_or_asterisks, blanks;
char line[1024];
register char *cp;
char **dg_hdr_field();
#define decrease_index() \
if (--backup_index < 0) backup_index = BACKUP_LINES - 1
backup_index = -1;
backup_count = 0;
end_or_asterisks = 0;
digest.dg_lines = 0;
next_line:
more_header_lines = 0;
next_possible_header_line:
digest.dg_lines++;
if (++backup_index == BACKUP_LINES) backup_index = 0;
if (backup_count < BACKUP_LINES) backup_count++;
backup_p[backup_index] = ftell(f);
line_type[backup_index] = LN_TEXT;
if (fgets(line, 1024, f) == NULL) {
TEST("end_of_file, bc=%d, lines=%d\n", backup_count, digest.dg_lines);
if (is_mmdf_folder) {
digest.dg_lpos = backup_p[backup_index];
is_mmdf_folder = 0;
return 0;
}
/* end of file => look for "****" or "End of" line */
if (end_or_asterisks)
while (--backup_count >= 0) {
--digest.dg_lines;
decrease_index();
if (line_type[backup_index] & (LN_ASTERISK | LN_END_OF)) break;
}
if (digest.dg_lines == 0) return 0;
while (--backup_count >= 0) {
--digest.dg_lines;
digest.dg_lpos = backup_p[backup_index];
decrease_index();
if ((line_type[backup_index] &
(LN_ASTERISK | LN_END_OF | LN_BLANK | LN_DASHED)) == 0)
break;
}
return 0; /* no article follows */
}
TEST("\n>>%-.50s ==>>", line, 0);
if (line[0] == '\001' && strcmp(line, "\001\001\001\001\n") == 0) {
digest.dg_lpos = backup_p[backup_index];
if (!is_mmdf_folder) fseek(f, digest.dg_lpos, 0);
--digest.dg_lines;
is_mmdf_folder = 0;
return (digest.dg_lines <= 0) ? -1 : 1;
}
if (is_mmdf_folder) goto next_line;
for (cp = line; *cp && isascii(*cp) && isspace(*cp); cp++);
if (*cp == NUL) {
TEST("BLANK", 0, 0);
line_type[backup_index] = LN_BLANK;
goto next_line;
}
blanks = cp - line;
if (*cp == '-') {
if (blanks > MAX_BLANKS_DASH) goto next_line;
while (*cp == '-') cp++;
if (cp - line - blanks > MIN_DASHES) {
while (*cp && (*cp == '-' || (isascii(*cp) && isspace(*cp)))) cp++;
if (*cp == NUL) {
TEST("DASHED", 0, 0);
line_type[backup_index] = LN_DASHED;
}
}
goto next_line;
}
if (*cp == '*') {
if (blanks > MAX_BLANKS_ASTERISK) goto next_line;
while (*cp == '*') cp++;
if (cp - line - blanks > MIN_ASTERISKS) {
while (*cp && (*cp == '*' || (isascii(*cp) && isspace(*cp)))) cp++;
if (*cp == NUL) {
TEST("ASTERISK", 0, 0);
line_type[backup_index] = LN_ASTERISK;
end_or_asterisks++;
}
}
goto next_line;
}
if (blanks <= MAX_BLANKS_END_OF &&
*cp == 'E' && strncmp(cp, "End of ", 7) == 0) {
TEST("END_OF_", 0, 0);
line_type[backup_index] = LN_END_OF;
end_or_asterisks++;
goto next_line;
}
if (blanks == 0) {
if (dg_hdr_field(line, 0)) {
TEST("HEADER", 0, 0);
line_type[backup_index] = LN_HEADER;
if (++more_header_lines < MIN_HEADER_LINES)
goto next_possible_header_line;
/* found block with MIN_HEADER_LINES */
/* search for beginning of header */
TEST("\nSearch for start of header\n", 0, 0);
for (;;) {
fseek(f, backup_p[backup_index], 0);
--digest.dg_lines;
if (--backup_count == 0) break;
decrease_index();
if ((line_type[backup_index] & (LN_HEADER | LN_TEXT)) == 0)
break;
}
if (digest.dg_lines == 0) {
TEST("Skipped empty article\n", 0, 0);
return -1;
}
for (;;) {
digest.dg_lpos = backup_p[backup_index];
if (--backup_count < 0) break;
decrease_index();
if ((line_type[backup_index] & (LN_BLANK | LN_DASHED)) == 0)
break;
--digest.dg_lines;
}
return (digest.dg_lines == 0) ? -1 : 1;
}
goto next_possible_header_line;
}
goto next_line;
}
parse_digest_header(f, all, hdrbuf)
FILE *f;
int all;
news_header_buffer hdrbuf;
{
extern char *parse_header(), **dg_hdr_field();
digest.dg_date = digest.dg_from = digest.dg_subj = digest.dg_to = NULL;
parse_header(f, dg_hdr_field, all, hdrbuf);
return digest.dg_from || digest.dg_subj;
}
static char **dg_hdr_field(lp, all)
register char *lp;
int all;
{
#define check(name, lgt, field) \
if (strncmp(name, lp, lgt) == 0) { \
TEST("MATCH: field ", 0, 0); \
return &digest.field; \
}
TEST("\nPARSE[%.20s] ==>> ", lp, 0);
switch (*lp++) {
case '\001':
if (!is_mmdf_folder && strncmp(lp, "\001\001\001\n", 4) == 0) {
is_mmdf_folder = 1;
digest.dg_hpos += 5;
return NULL;
}
break;
case 'D':
case 'd':
check("ate: ", 5, dg_date);
break;
case 'F':
case 'f':
check("rom: ", 5, dg_from);
break;
case 'R':
case 'r':
if (!all) break;
check("e: ", 3, dg_subj);
break;
case 'S':
case 's':
check("ubject", 6, dg_subj);
break;
case 'T':
case 't':
check("itle: ", 6, dg_subj);
if (!all) break;
check("o: ", 3, dg_to);
break;
}
#undef check
TEST("NOT MATCHED ", 0, 0);
return NULL;
}